In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
Find filenames for all .xls and .xlsx files in the wind generation subfolder
In [7]:
path = 'ERCOT/Hourly wind generation'
full_xls = os.path.join(path, '*.xls')
full_xlsx = os.path.join(path, '*.xlsx')
files = glob.glob(full_xls)
files.extend(glob.glob(full_xlsx))
In [17]:
cols = ['ERCOT Load, MW', 'Total Wind Installed, MW',
'Total Wind Output, MW', 'Wind Output, % of Installed',
'Wind Output, % of Load', '1-hr MW change', '1-hr % change']
In [22]:
df = pd.concat([pd.read_excel(fn, sn='numbers', index_col=0) for fn in files])
In [23]:
df.sort_index(inplace=True)
In [24]:
df.head()
Out[24]:
In [25]:
df = df.loc[:,cols]
In [26]:
df.head()
Out[26]:
In [27]:
df.plot(y='Total Wind Installed, MW', use_index=True)
Out[27]:
Not sure why, but there is one bad row at the end. Removing it in the next cell.
In [28]:
df = df.iloc[:-1,:]
In [29]:
df.plot(y='Total Wind Installed, MW', use_index=True)
Out[29]:
In [34]:
df.plot(y='ERCOT Load, MW', use_index=True)
Out[34]:
Looks like there is one datapoint from 2016 at the end of the file. I'm going to remove it.
In [39]:
sns.pointplot(df.index.year, df['ERCOT Load, MW'], aspect=1.5)
plt.title('ERCOT demand by year')
Out[39]:
In [41]:
df = df.iloc[:-1,:]
In [42]:
sns.pointplot(df.index.year, df['ERCOT Load, MW'], aspect=1.5)
plt.title('ERCOT demand by year')
Out[42]:
In [40]:
sns.pointplot(df.index.month, df['ERCOT Load, MW'], aspect=1.5)
plt.title('ERCOT demand by month')
Out[40]:
In [43]:
filename = 'ERCOT wind data.csv'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
In [45]:
df.to_csv(fullpath)